1 Functions

2 Data

# read processed data:
SCC1_cancer  = readRDS(file = "./Data/cervical_cancer_data/Qiu et al/SCC1/SCC1_cancer.RDS")
SCC2_cancer  = readRDS(file = "./Data/cervical_cancer_data/Qiu et al/SCC2/SCC2_cancer_processed.RDS")
SCC3_cancer = readRDS(file = "./Data/cervical_cancer_data/Qiu et al/SCC3/SCC3_cancer_processed.RDS")
SCC4_cancer = readRDS(file = "./Data/cervical_cancer_data/Qiu et al/SCC4/SCC4_cancer_processed.RDS")
SCC5_cancer  = readRDS(file = "./Data/cervical_cancer_data/Qiu et al/SCC5/SCC5_cancer_processed.RDS")

scc.big <- merge(SCC1_cancer, y = c(SCC2_cancer,SCC3_cancer, SCC4_cancer,SCC5_cancer), add.cell.ids = c("SCC1","SCC2", "SCC3", "SCC4","SCC5"), project = "SCC")
scc.big$orig.ident =  sapply(X = strsplit(colnames(scc.big), split = "_"), FUN = "[", 1)


VlnPlot(object = scc.big,features = "MYB",group.by = "orig.ident",slot = "data", assay = "RNA")

NA
NA
b = FetchData(object = scc.big,vars = c("MYB","orig.ident"),slot = "data", assay = "RNA")
b %>%   group_by(orig.ident) %>% 
  summarise(counts = sum(MYB > 0, na.rm = TRUE))

3 Violin plot all patients without scc2

scc_myb_patients<- subset(scc.big, subset = orig.ident %in% c("SCC3","SCC4","SCC5"))
library(rstatix)
myb_vs_hpv = FetchData(object = scc_myb_patients, vars = c("hpv_positive", "MYB"))
df = reshape2::melt(myb_vs_hpv,value.name = "logTPM") %>% dplyr::rename(gene = variable)
Using hpv_positive as id variables
stat.test <- df %>%
  wilcox_test(logTPM ~ hpv_positive)

stat.test



p = ggplot(myb_vs_hpv,aes( x = hpv_positive, y = MYB))+ 
    geom_violin(trim=FALSE,aes(fill = hpv_positive)) +
  theme_minimal()+
  stat_summary(fun.data = function(x) data.frame(y=max(x)*1.15, label = paste("Mean=",round(mean(x),digits = 2))), geom="text") +ylab("average MYB")+ggtitle("all patients") +
  stat_pvalue_manual(stat.test,y.position = 3, label = "Wilcox, p = {p}",remove.bracket = F)+
  geom_boxplot(width=.1, outlier.shape=NA) 


p

pdf(file = "./Figures/SCC_myb_hpv_violin_all_patients.pdf")
p
dev.off()
null device 
          1 

4 Violin plot all patients with scc2

scc_myb_patients<- subset(scc.big, subset = orig.ident %in% c("SCC2","SCC3","SCC4","SCC5"))
library(rstatix)
myb_vs_hpv = FetchData(object = scc_myb_patients, vars = c("hpv_positive", "MYB"))
df = reshape2::melt(myb_vs_hpv,value.name = "logTPM") %>% dplyr::rename(gene = variable)
Using hpv_positive as id variables
stat.test <- df %>%
  wilcox_test(logTPM ~ hpv_positive)

stat.test



p = ggplot(myb_vs_hpv,aes( x = hpv_positive, y = MYB))+ 
    geom_violin(trim=FALSE,aes(fill = hpv_positive)) +
  theme_minimal()+
  stat_summary(fun.data = function(x) data.frame(y=max(x)*1.15, label = paste("Mean=",round(mean(x),digits = 2))), geom="text") +ylab("average MYB")+ggtitle("all patients") +
  stat_pvalue_manual(stat.test,y.position = 3, label = "Wilcox, p = {p}",remove.bracket = F)+
  geom_boxplot(width=.1, outlier.shape=NA) 


p

5 Boxplot by patient

pdf(file = "./Figures/SCC_HPV_MYB_boxplot_bySample.pdf",width = 8)
p
dev.off()
null device 
          1 

6 All genes boxplots

genes = c("ANLN", "TUBB6", "AXL", "IFT122", "GFM1", "MYB", "JAG1")
myb_vs_hpv = FetchData(object = scc_myb_patients,vars = c("hpv_positive",genes))
df = reshape2::melt(myb_vs_hpv,value.name = "logTPM") %>% dplyr::rename(gene = variable)
Using hpv_positive as id variables
stat.test <- df %>%
    group_by(gene) %>%
  wilcox_test(logTPM ~ hpv_positive) %>%
  mutate(y.position = 5)

stat.test

stat.test <- stat.test %>% 
  add_xy_position(x = "gene", dodge = 0.8)

p = ggboxplot(
  df,
  x = "gene",
  y = "logTPM",
  color = "hpv_positive",
  palette = "jco",
  add = "jitter"
)+ stat_pvalue_manual(stat.test,y.position = 4, label = "p = {p}",remove.bracket = T)

p

pdf(file = "./Figures/SCC_HPV_genes_all_patients.pdf",width = 8)
p
dev.off()
metagenes_violin_compare.2 = function(dataset,prefix = "",pre_on = c("OSI","NT"),axis.text.x = 11,test = "t.test", programs = c("Hypoxia","TNFa","Cell_cycle"),return_list = F,combine_patients = F){
  require(facefuns)
  plt.lst = list()
  if(combine_patients){
    genes_by_tp = FetchData(object = dataset,vars =  c("treatment",programs)) %>% filter(treatment %in% pre_on)  %>% as.data.frame() #mean expression
    formula <- as.formula( paste("c(", paste(programs, collapse = ","), ")~ treatment ") )
    
    #plot and split by patient:   
    stat.test = compare_means(formula = formula ,data = genes_by_tp,method = test,p.adjust.method = "fdr")%>% # Add pairwise comparisons p-value
      dplyr::filter(group1 == pre_on[1] & group2 == pre_on[2])  #filter for pre vs on treatment only
    
    stat.test$p.format =stat.test$p.adj #modift 0 pvalue to be lowest possible float
    stat.test$p.format[!stat.test$p.format == 0 ] <- paste("=",stat.test$p.format[!stat.test$p.format == 0 ])
    stat.test$p.format[stat.test$p.format == 0 ] <- paste("<",.Machine$double.xmin %>% signif(digits = 3))
    
    
    genes_by_tp = reshape2::melt(genes_by_tp, id.vars = c("treatment"),value.name = "score")
    plt = ggplot(genes_by_tp, aes(x = variable, y = score,fill = treatment)) + geom_split_violin(scale = 'width')+ 
      geom_boxplot(width = 0.25, notch = FALSE, notchwidth = .4, outlier.shape = NA, coef=0)+
      ylim(min(genes_by_tp$score),max(genes_by_tp$score)*1.25)
    plt = plt +stat_pvalue_manual(stat.test, label = "{p.signif}", label.size = 7,  #add p value
                                  y.position = 1.5,inherit.aes = F,size = 3.3,x = ".y.") # set position at the top value
    return(plt)
  }
  
  
  
  
  
  if (return_list) {
    return(plt.lst)
  }
}
scc_myb_patients$treatment = scc_myb_patients$hpv_positive %>% gsub(pattern = "negative",replacement = "HPV-negative")%>% gsub(pattern = "positive",replacement = "HPV-positive")
Error in h(simpleError(msg, call)) : 
  error in evaluating the argument 'x' in selecting a method for function 'gsub': error in evaluating the argument 'x' in selecting a method for function 'gsub': object 'scc_myb_patients' not found
pdf(file = "./Figures/SCC_HPV_Top_violin.pdf",width = 10,height = 5)
p
Warning: Removed 392 rows containing non-finite values (stat_ydensity).
Warning: Removed 392 rows containing non-finite values (stat_boxplot).
dev.off()
null device 
          1 
myplots <- list()  # new empty list

for (patient_name in c("SCC2", "SCC3", "SCC4", "SCC5")) {
  patient_data = subset(x = scc_myb_patients, subset = orig.ident == patient_name)
  df  = FetchData(object = patient_data,
                  vars = c("hpv_positive", "MYB_positive")) %>% droplevels()
  test = fisher_test(table(df))
  p = ggbarstats(
    df,
    MYB_positive,
    hpv_positive,
    results.subtitle = FALSE,
    subtitle = paste0("Fisher's exact test", ", p-value = ",
                      test$p)
  ,title = patient_name)
  myplots[[patient_name]] <- p  # add each plot into plot list

}
ggarrange(plotlist = myplots,ncol = 4,nrow = 1)

  fisher.test(table(df))

    Fisher's Exact Test for Count Data

data:  table(df)
p-value < 2.2e-16
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
 2.236243 3.479375
sample estimates:
odds ratio 
  2.786281 
  fisher_test(table(df))
NA
---
title: '`r rstudioapi::getSourceEditorContext()$path %>% basename() %>% gsub(pattern = "\\.Rmd",replacement = "")`' 
author: "Avishai Wizel"
date: '`r Sys.time()`'
output: 
  html_notebook: 
    code_folding: hide
    toc: yes
    toc_collapse: yes
    toc_float: 
      collapsed: FALSE
    number_sections: true
    toc_depth: 1
---



# Functions

```{r warning=FALSE}
```

```{r}
```

# Data


```{r}
# read processed data:
SCC1_cancer  = readRDS(file = "./Data/cervical_cancer_data/Qiu et al/SCC1/SCC1_cancer.RDS")
SCC2_cancer  = readRDS(file = "./Data/cervical_cancer_data/Qiu et al/SCC2/SCC2_cancer_processed.RDS")
SCC3_cancer = readRDS(file = "./Data/cervical_cancer_data/Qiu et al/SCC3/SCC3_cancer_processed.RDS")
SCC4_cancer = readRDS(file = "./Data/cervical_cancer_data/Qiu et al/SCC4/SCC4_cancer_processed.RDS")
SCC5_cancer  = readRDS(file = "./Data/cervical_cancer_data/Qiu et al/SCC5/SCC5_cancer_processed.RDS")

scc.big <- merge(SCC1_cancer, y = c(SCC2_cancer,SCC3_cancer, SCC4_cancer,SCC5_cancer), add.cell.ids = c("SCC1","SCC2", "SCC3", "SCC4","SCC5"), project = "SCC")
scc.big$orig.ident =  sapply(X = strsplit(colnames(scc.big), split = "_"), FUN = "[", 1)


VlnPlot(object = scc.big,features = "MYB",group.by = "orig.ident",slot = "data", assay = "RNA")


```


```{r}
myb_data = FetchData(object = scc.big,vars = c("MYB","orig.ident"),slot = "data", assay = "RNA")
myb_data %>%   group_by(orig.ident) %>% 
  summarise(counts = sum(MYB > 0, na.rm = TRUE))
```


# Violin plot all patients without scc2
```{r fig.height=6, fig.width=12}
scc_myb_patients<- subset(scc.big, subset = orig.ident %in% c("SCC3","SCC4","SCC5"))
```


```{r}
library(rstatix)
myb_vs_hpv = FetchData(object = scc_myb_patients, vars = c("hpv_positive", "MYB"))
df = reshape2::melt(myb_vs_hpv,value.name = "logTPM") %>% dplyr::rename(gene = variable)

stat.test <- df %>%
  wilcox_test(logTPM ~ hpv_positive)

stat.test



p = ggplot(myb_vs_hpv,aes( x = hpv_positive, y = MYB))+ 
    geom_violin(trim=FALSE,aes(fill = hpv_positive)) +
  theme_minimal()+
  stat_summary(fun.data = function(x) data.frame(y=max(x)*1.15, label = paste("Mean=",round(mean(x),digits = 2))), geom="text") +ylab("average MYB")+ggtitle("all patients") +
  stat_pvalue_manual(stat.test,y.position = 3, label = "Wilcox, p = {p}",remove.bracket = F)+
  geom_boxplot(width=.1, outlier.shape=NA) 


p
```
```{r}
pdf(file = "./Figures/SCC_myb_hpv_violin_all_patients.pdf")
p
dev.off()
```
# Violin plot all patients with scc2

```{r fig.height=6, fig.width=12}
scc_myb_patients<- subset(scc.big, subset = orig.ident %in% c("SCC2","SCC3","SCC4","SCC5"))
```


```{r}
library(rstatix)
myb_vs_hpv = FetchData(object = scc_myb_patients, vars = c("hpv_positive", "MYB"))
df = reshape2::melt(myb_vs_hpv,value.name = "logTPM") %>% dplyr::rename(gene = variable)

stat.test <- df %>%
  wilcox_test(logTPM ~ hpv_positive)

stat.test



p = ggplot(myb_vs_hpv,aes( x = hpv_positive, y = MYB))+ 
    geom_violin(trim=FALSE,aes(fill = hpv_positive)) +
  theme_minimal()+
  stat_summary(fun.data = function(x) data.frame(y=max(x)*1.15, label = paste("Mean=",round(mean(x),digits = 2))), geom="text") +ylab("average MYB")+ggtitle("all patients") +
  stat_pvalue_manual(stat.test,y.position = 3, label = "Wilcox, p = {p}",remove.bracket = F)+
  geom_boxplot(width=.1, outlier.shape=NA) 


p
```


# Boxplot by patient
```{r}
scc_myb_patients<- subset(scc.big, subset = orig.ident %in% c("SCC2","SCC3","SCC4","SCC5"))

df = FetchData(object = scc_myb_patients,vars = c("orig.ident","MYB","hpv_positive")) %>% group_by(orig.ident,hpv_positive) %>% summarise(
  myb = mean(MYB),.groups = "drop_last")
stat.test <- df %>% group_by("hpv_positive") %>% 
  wilcox_test(myb ~ hpv_positive)


df = reshape2::dcast(df, orig.ident ~ hpv_positive)


p = ggpaired(df, cond1  = "positive",cond2   = "negative",palette = "jco",
            add = "jitter", line.color = "gray", line.size = 0.4,id = "orig.ident", fill = "condition") + theme_minimal()+
  # stat_compare_means(method = "wilcox.test",comparisons = list(c("positive","negative")),paired = F)+
  stat_summary(fun.data = function(x) data.frame(y=max(x)*1.2, label = paste("Mean=",round(mean(x),digits = 2))), geom="text")+ylab("MYB")+ stat_pvalue_manual(stat.test,y.position = 0.2, label = "Wilcox, p = {p}",remove.bracket = F)
p
```
```{r}
pdf(file = "./Figures/SCC_HPV_MYB_boxplot_bySample.pdf",width = 8)
p
dev.off()
```
# All genes boxplots

```{r fig.width=8}
genes = c("ANLN", "TUBB6", "AXL", "IFT122", "GFM1", "MYB", "JAG1")
myb_vs_hpv = FetchData(object = scc_myb_patients,vars = c("hpv_positive",genes))
df = reshape2::melt(myb_vs_hpv,value.name = "logTPM") %>% dplyr::rename(gene = variable)


stat.test <- df %>%
    group_by(gene) %>%
  wilcox_test(logTPM ~ hpv_positive) %>%
  mutate(y.position = 5)

stat.test

stat.test <- stat.test %>% 
  add_xy_position(x = "gene", dodge = 0.8)

p = ggboxplot(
  df,
  x = "gene",
  y = "logTPM",
  color = "hpv_positive",
  palette = "jco",
  add = "jitter"
)+ stat_pvalue_manual(stat.test,y.position = 4, label = "p = {p}",remove.bracket = T)

p
```
```{r}
pdf(file = "./Figures/SCC_HPV_genes_all_patients.pdf",width = 8)
p
dev.off()
```

```{r}
metagenes_violin_compare.2 = function(dataset,prefix = "",pre_on = c("OSI","NT"),axis.text.x = 11,test = "t.test", programs = c("Hypoxia","TNFa","Cell_cycle"),return_list = F,combine_patients = F){
  require(facefuns)
  plt.lst = list()
  if(combine_patients){
    genes_by_tp = FetchData(object = dataset,vars =  c("treatment",programs)) %>% filter(treatment %in% pre_on)  %>% as.data.frame() #mean expression
    formula <- as.formula( paste("c(", paste(programs, collapse = ","), ")~ treatment ") )
    
    #plot and split by patient:   
    stat.test = compare_means(formula = formula ,data = genes_by_tp,method = test,p.adjust.method = "fdr")%>% # Add pairwise comparisons p-value
      dplyr::filter(group1 == pre_on[1] & group2 == pre_on[2])  #filter for pre vs on treatment only
    
    stat.test$p.format =stat.test$p.adj #modift 0 pvalue to be lowest possible float
    stat.test$p.format[!stat.test$p.format == 0 ] <- paste("=",stat.test$p.format[!stat.test$p.format == 0 ])
    stat.test$p.format[stat.test$p.format == 0 ] <- paste("<",.Machine$double.xmin %>% signif(digits = 3))
    
    
    genes_by_tp = reshape2::melt(genes_by_tp, id.vars = c("treatment"),value.name = "score")
    plt = ggplot(genes_by_tp, aes(x = variable, y = score,fill = treatment)) + geom_split_violin(scale = 'width')+ 
      geom_boxplot(width = 0.25, notch = FALSE, notchwidth = .4, outlier.shape = NA, coef=0)+
      ylim(min(genes_by_tp$score),max(genes_by_tp$score)*1.25)
    plt = plt +stat_pvalue_manual(stat.test, label = "{p.signif}", label.size = 7,  #add p value
                                  y.position = 1.5,inherit.aes = F,size = 3.3,x = ".y.") # set position at the top value
    return(plt)
  }
  
  
  
  
  
  if (return_list) {
    return(plt.lst)
  }
}
```

```{r fig.height=5, fig.width=10}
scc_myb_patients$treatment = scc_myb_patients$hpv_positive %>% gsub(pattern = "negative",replacement = "HPV-negative")%>% gsub(pattern = "positive",replacement = "HPV-positive")
scc_myb_patients@meta.data[["treatment"]] = factor(scc_myb_patients$treatment, levels = c("HPV-positive", "HPV-negative"))

p = metagenes_violin_compare.2(dataset = scc_myb_patients, prefix = "patient",pre_on = c("HPV-negative","HPV-positive"),test = "wilcox.test",programs = top_genes, return_list = F,combine_patients = T) +scale_y_continuous(limits = c(0,1.5)) + labs(fill = "")+ylab("LogTPM")+xlab("Gene")+theme(axis.title=element_text(size=14))+ scale_fill_manual(values=c("#F8766D", "cyan3"))
p
```


```{r}
pdf(file = "./Figures/SCC_HPV_Top_violin.pdf",width = 10,height = 5)
p
dev.off()
```

```{r fig.height=7, fig.width=13}
myplots <- list()  # new empty list

for (patient_name in c("SCC2", "SCC3", "SCC4", "SCC5")) {
  patient_data = subset(x = scc_myb_patients, subset = orig.ident == patient_name)
  df  = FetchData(object = patient_data,
                  vars = c("hpv_positive", "MYB_positive")) %>% droplevels()
  test = fisher_test(table(df))
  p = ggbarstats(
    df,
    MYB_positive,
    hpv_positive,
    results.subtitle = FALSE,
    subtitle = paste0("Fisher's exact test", ", p-value = ",
                      test$p)
  ,title = patient_name)
  myplots[[patient_name]] <- p  # add each plot into plot list

}
ggarrange(plotlist = myplots,ncol = 4,nrow = 1)

```
```{r}
  fisher.test(table(df))
  fisher_test(table(df))

```

<script src="https://hypothes.is/embed.js" async></script>

